{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "binding-delta",
   "metadata": {
    "papermill": {
     "duration": 0.016304,
     "end_time": "2021-03-22T20:29:23.476444",
     "exception": false,
     "start_time": "2021-03-22T20:29:23.460140",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# ibm_sql_query_cpd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb97e294-9399-4d96-a95c-8ad7e29a2872",
   "metadata": {},
   "source": [
    "Run arbitrary SQL based data transformation jobs on CSV, PARQUET, JSON, AVRO and ORC data stored on Cloud Object Storage using IBM SQL Query. Transformation results are written stored back to Cloud Object Storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "884bff3a-dc51-4c8b-a98f-1d0a8ac1de94",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "#os.environ['create_image']='True'\n",
    "os.environ['repository']='us.icr.io/ai-lifecycle-staging'\n",
    "os.environ['version']='0.64'\n",
    "#\n",
    "#os.environ['install_requirements']='True'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7f19a1a1-9cf2-4cb6-a0d0-859c9de3a525",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[NbConvertApp] Converting notebook ibm-sql-query-cpd.ipynb to script\n",
      "[NbConvertApp] Writing 13698 bytes to ibm-sql-query-cpd.py\n",
      "Sending build context to Docker daemon  304.6kB\n",
      "Step 1/5 : FROM registry.access.redhat.com/ubi8/python-39\n",
      " ---> 54ab5aa3f6b8\n",
      "Step 2/5 : RUN pip install ipython nbformat numpy ibm-cos-sdk-core ibm-cos-sdk ibm-watson-machine-learning ibm-watson-studio-pipelines ibmcloudsql pyyaml\n",
      " ---> Using cache\n",
      " ---> b486348bf686\n",
      "Step 3/5 : ADD ibm-sql-query-cpd.py .\n",
      " ---> 50bbf0c1c143\n",
      "Step 4/5 : ADD start.sh .\n",
      " ---> 2a9b823c7f1d\n",
      "Step 5/5 : RUN echo 0.64 > ./.version\n",
      " ---> Running in 3ad11d3f1544\n",
      "Removing intermediate container 3ad11d3f1544\n",
      " ---> a39bf4fdadb9\n",
      "Successfully built a39bf4fdadb9\n",
      "Successfully tagged ibm_sql_query_cpd:0.64\n",
      "The push refers to repository [us.icr.io/ai-lifecycle-staging/ibm_sql_query_cpd]\n",
      "\n",
      "\u001b[1Ba847a38b: Preparing \n",
      "\u001b[1B22e8833a: Preparing \n",
      "\u001b[1B2032024f: Preparing \n",
      "\u001b[1B9271e776: Preparing \n",
      "\u001b[1Ba0c9fb84: Preparing \n",
      "\u001b[1B12ade9b7: Preparing \n",
      "\u001b[1B45f59bfc: Preparing \n",
      "\u001b[1Bc6aaa811: Preparing \n",
      "\u001b[7B2032024f: Pushed lready exists kB6A\u001b[2K\u001b[9A\u001b[2K\u001b[4A\u001b[2K\u001b[1A\u001b[2K\u001b[8A\u001b[2K\u001b[9A\u001b[2K\u001b[7A\u001b[2K0.64: digest: sha256:6b68c2ea95ada27442fd5f2206c73cf8b10f8ed41eaf67c18d1ac610b35c69cb size: 2208\n"
     ]
    }
   ],
   "source": [
    "if bool(os.environ.get('create_image',False)):\n",
    "    docker_file=\"\"\"\n",
    "    FROM registry.access.redhat.com/ubi8/python-39\n",
    "    RUN pip install ipython nbformat numpy ibm-cos-sdk-core ibm-cos-sdk ibm-watson-machine-learning ibm-watson-studio-pipelines ibmcloudsql pyyaml\n",
    "    ADD ibm-sql-query-cpd.py .\n",
    "    ADD start.sh .\n",
    "    RUN echo {} > ./.version\n",
    "    \"\"\".format(os.environ['version'])\n",
    "    with open(\"Dockerfile\", \"w\") as text_file:\n",
    "        text_file.write(docker_file)\n",
    "\n",
    "    start_file=\"\"\"\n",
    "    #!/bin/bash\n",
    "    echo \"Parameter 1: $1\"\n",
    "    echo \"Parameter 2: $2\"\n",
    "    echo \"Parameter 3: $3\"\n",
    "    echo \"Parameter 4: $4\"\n",
    "    echo \"Parameter 5: $5\"\n",
    "    echo \"Parameter 6: $6\"\n",
    "    echo \"Parameter 7: $7\"\n",
    "    echo \"Parameter 8: $8\"\n",
    "    echo \"Parameter 9: $9\"\n",
    "    echo \"Parameter 10: ${10}\"\n",
    "    echo \"Parameter 11: ${11}\"\n",
    "    echo \"Parameter 12: ${12}\"\n",
    "    echo \"Parameter 13: ${13}\"\n",
    "    echo \"Parameter 14: ${14}\"\n",
    "    echo \"Parameter 15: ${15}\"\n",
    "    echo \"Parameter 16: ${16}\"\n",
    "    echo \"Parameter 17: ${17}\"\n",
    "    echo \"Parameter 18: ${18}\"\n",
    "    echo \"Parameter 19: ${19}\"\n",
    "    echo \"Parameter 20: ${20}\"\n",
    "    echo \"Version\":\n",
    "    cat ./.version\n",
    "    cat /opt/user-token/token\n",
    "    python /opt/app-root/src/ibm-sql-query-cpd.py \"$1$2\" \"$3$4\" \"$5$6\" \"$7$8\" \"$9${10}\" \"${11}${12}\" \"${13}${14}\" \"${15}${16}\" \"${17}${18}\" \"${19}${20}\"\n",
    "    \"\"\"\n",
    "    with open(\"start.sh\", \"w\") as text_file:\n",
    "        text_file.write(start_file)\n",
    "\n",
    "    !chmod 755 start.sh\n",
    "    !jupyter nbconvert --to script ibm-sql-query-cpd.ipynb    \n",
    "    !docker build -t ibm_sql_query_cpd:`echo $version` .\n",
    "    !docker tag ibm_sql_query_cpd:`echo $version` `echo $repository`/ibm_sql_query_cpd:`echo $version`\n",
    "    !docker push `echo $repository`/ibm_sql_query_cpd:`echo $version`\n",
    "    !rm Dockerfile\n",
    "    !rm ibm-sql-query-cpd.py\n",
    "    !rm start.sh\n",
    "elif bool(os.environ.get('install_requirements',False)):\n",
    "    !pip install numpy ibm-cos-sdk-core ibm-cos-sdk ibm-watson-machine-learning ibm-watson-studio-pipelines ibmcloudsql pyyaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e63f153-5524-4c3a-8b55-2afb01f2750b",
   "metadata": {
    "papermill": {
     "duration": 0.164002,
     "end_time": "2021-03-22T20:29:25.951504",
     "exception": false,
     "start_time": "2021-03-22T20:29:25.787502",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import glob\n",
    "import logging\n",
    "import ibmcloudsql\n",
    "from ibmcloudsql import SQLQuery\n",
    "import json\n",
    "import os\n",
    "import shutil\n",
    "import sys\n",
    "import re\n",
    "from ibm_watson_machine_learning import APIClient\n",
    "from ibm_watson_studio_pipelines import WSPipelines\n",
    "from ibm_watson_studio_pipelines.cpd_paths import CpdScope, CpdPath\n",
    "import yaml\n",
    "import ast"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abstract-cambridge",
   "metadata": {
    "papermill": {
     "duration": 0.012801,
     "end_time": "2021-03-22T20:29:25.972462",
     "exception": false,
     "start_time": "2021-03-22T20:29:25.959661",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# COS URL where the results of the SQL job are to be stored\n",
    "target_dir_path = os.environ.get('target_dir_path')\n",
    "\n",
    "# Asset name to register for the results written by the SQL job\n",
    "target_asset_name = os.environ.get('target_asset_name')\n",
    "\n",
    "# sql statement to execute\n",
    "sql = os.environ.get('sql')\n",
    "\n",
    "# (unique) Custom Resource Name (CRN) of IBM SQL Query Service\n",
    "data_engine_crn = os.environ.get('data_engine_crn')\n",
    "\n",
    "# default: CSV - (will be generated into according STORED AS … clause in the INTO clause)\n",
    "format = os.environ.get('format' , 'CSV')\n",
    "\n",
    "# optional, list of columns to use for partitioning the results of the SQL job, will be generated into according PARTITIONED BY (<columns>) clause in the INTO clause)\n",
    "partition_columns = os.environ.get('partition_columns','')\n",
    "\n",
    "# optional, number of objects to store the results of the SQL job in, will be generated into according PARTITIONED INTO <num> OBJECTS clause in INTO clause\n",
    "number_of_objects = int(os.environ.get('number_of_objects', 0))\n",
    "\n",
    "# optional, number of rows to be stored in each result object of the SQL job, will be generated into according PARTITIONED EVERY <num> ROWS clause in INTO clause\n",
    "rows_per_object = int(os.environ.get('rows_per_object', 0))\n",
    "\n",
    "# default: False, only valid when none of the above partitioning option is specified, produces exactly one object with name specified in target_dir_path, twill be generated into sqlClient.rename_exact_result(jobid) after SQL has run.\n",
    "exact_name = os.environ.get('exact_name', 'False')\n",
    "\n",
    "# default: False - will be generated into JOBPREFIX NONE in the INTO clause. Will cause results of previous runs with same output_uri to be overwritten, because no unique sub folder will be created for the result)\n",
    "no_jobid_folder = os.environ.get('no_jobid_folder', 'False')\n",
    "\n",
    "# default: output.txt - output file name containing the CPD path of the resulting asset\n",
    "data_asset = os.environ.get('data_asset','output.txt')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6c2ee10-a8b5-48c9-abb1-2943287b680f",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "token = 'eyJraWQiOiIyMDIyMDYxNTA4MjYiLCJhbGciOiJSUzI1NiJ9.eyJpYW1faWQiOiJJQk1pZC0yNzAwMDI1NzNZIiwiaWQiOiJJQk1pZC0yNzAwMDI1NzNZIiwicmVhbG1pZCI6IklCTWlkIiwic2Vzc2lvbl9pZCI6IkMtMjc5YjNlYzctOGMwYy00YzQzLWJiMWMtMGNjMWY5MWI0OGM5Iiwic2Vzc2lvbl9leHBfbWF4IjoxNjU3MTg2MDQ2LCJzZXNzaW9uX2V4cF9uZXh0IjoxNjU3MTA3Njc3LCJqdGkiOiI3ODQxYzkyZS0yNzY3LTRiYjMtYmU5OC01ZjM0MjQ0NzU5NmEiLCJpZGVudGlmaWVyIjoiMjcwMDAyNTczWSIsImdpdmVuX25hbWUiOiJSb21lbyIsImZhbWlseV9uYW1lIjoiS2llbnpsZXIiLCJuYW1lIjoiUm9tZW8gS2llbnpsZXIiLCJlbWFpbCI6InJvbWVvLmtpZW56bGVyQGNoLmlibS5jb20iLCJzdWIiOiJyb21lby5raWVuemxlckBjaC5pYm0uY29tIiwiYXV0aG4iOnsic3ViIjoicm9tZW8ua2llbnpsZXJAY2guaWJtLmNvbSIsImlhbV9pZCI6IklCTWlkLTI3MDAwMjU3M1kiLCJuYW1lIjoiUm9tZW8gS2llbnpsZXIiLCJnaXZlbl9uYW1lIjoiUm9tZW8iLCJmYW1pbHlfbmFtZSI6IktpZW56bGVyIiwiZW1haWwiOiJyb21lby5raWVuemxlckBjaC5pYm0uY29tIn0sImFjY291bnQiOnsiYm91bmRhcnkiOiJnbG9iYWwiLCJ2YWxpZCI6dHJ1ZSwiYnNzIjoiOWIxM2I4NTdhMzIzNDFiNzE2NzI1NWRlNzE3MTcyZjUiLCJpbXNfdXNlcl9pZCI6IjgwODAyMjIiLCJpbXMiOiIyMDMyNDc0In0sImlhdCI6MTY1NzEwMDQ3NywiZXhwIjoxNjU3MTAxNjc3LCJpc3MiOiJodHRwczovL2lhbS5jbG91ZC5pYm0uY29tL2lkZW50aXR5IiwiZ3JhbnRfdHlwZSI6InVybjppYm06cGFyYW1zOm9hdXRoOmdyYW50LXR5cGU6cGFzc2NvZGUiLCJzY29wZSI6ImlibSBvcGVuaWQiLCJjbGllbnRfaWQiOiJieCIsImFjciI6MSwiYW1yIjpbInB3ZCJdfQ.VicviebFD7b-evDMQVee6MTGIEMW27f3V2VovTvXRbmJ0pwUeSwyFp5fXULi8kahkBJQReTjsk3SVtWEB1e_HAiAqLtdeu_OzejBXX7o2eugEBxsNLrFUSGUr_bgMrb9IhDk9NUTBnpppIkItwrMBMZSGLT-9tLqa1cd9SdAjwaKR7O3A9MXxN7toP8Pyn8KgMxAeFls33uRKStOcE9zHHnhmup6JWVpMvkoY-cuR-_ZMv1Bb85rjU9u79wofnpzEoJ7_VGu-DtDWjchD028fpi_FAXwmYbU-7JHSgnfXUCuU-EBTPv2bv5tGxnrxROvc5yFPIKz4nMOy0sE5YPW0w'\n",
    "target_dir_path='cpd:///projects/e0bce158-a9e4-4be6-a1da-20a04a7770f5/connections/de544f56-05c0-4dda-9e00-06458052c467/files/cos-rkie-sql-engine-test/sql_results'\n",
    "sql='SELECT * FROM cos://eu-de/claimed-test/data.parquet stored as parquet'\n",
    "sql_query_crn='crn:v1:bluemix:public:sql-query:us-south:a/9b13b857a32341b7167255de717172f5:f9dd6c9e-b24b-4506-819e-e038c92339e4::'\n",
    "target_location='cos://eu-de/claimed-test-results/data.parquet'\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55f9b39f-2c8c-4ab5-b4f5-513357bf20ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "for element in sys.argv:\n",
    "    logging.warning('argv raw ' +  element)\n",
    "\n",
    "parameters = list(\n",
    "    map(lambda s: re.sub('$', '\"', s),\n",
    "        map(\n",
    "            lambda s: s.replace('=', '=\"'),\n",
    "            filter(\n",
    "                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n",
    "                sys.argv\n",
    "            )\n",
    "    )))\n",
    "\n",
    "\n",
    "for parameter in parameters:\n",
    "    exec(parameter)\n",
    "    logging.warning('Parameter: ' + parameter)\n",
    "\n",
    "exact_name = ast.literal_eval(exact_name.capitalize())\n",
    "no_jobid_folder = ast.literal_eval(no_jobid_folder.capitalize())\n",
    "number_of_objects = number_of_objects if type(number_of_objects)==int else int(number_of_objects) if len(number_of_objects)>0 else 0\n",
    "rows_per_object = rows_per_object if type(rows_per_object)==int else int(rows_per_object) if len(rows_per_object)>0 else 0\n",
    "\n",
    "\n",
    "for parameter in parameters:\n",
    "    exec(\"logging.warning('final parameter: ' + str({}))\".format(parameter.split('=')[0]))\n",
    "    exec(\"logging.warning('final parameter type: ' + str(type({})))\".format(parameter.split('=')[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0e337eb-1650-47a9-9461-a650ed34ecb8",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/opt/user-token/token', 'r') as file:\n",
    "    token = file.read().rstrip()\n",
    "\n",
    "    logging.warning('Token: ' + token)\n",
    "    logging.info('TokenI: ' + token)\n",
    "    \n",
    "logging.warning('Token: ' + token)\n",
    "logging.info('TokenIO: ' + token)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0484cf13-6f14-4a6c-a7f8-bcfb4b966f33",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(os.environ.get('CPDCONFIG')) as file:\n",
    "    cpd_config = yaml.load(file, Loader=yaml.FullLoader)\n",
    "    for service in cpd_config['services']:\n",
    "        if service['type'] == 'watson-studio':\n",
    "            cpdaas_url = service['service']['url']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b2d800c-4824-4bdc-8140-3f1fcbb8346a",
   "metadata": {},
   "outputs": [],
   "source": [
    "target_dir_path_object = CpdPath.from_string(target_dir_path)\n",
    "logging.warning('target_dir_path_object: {}'.format(target_dir_path_object))\n",
    "\n",
    "project_id = target_dir_path_object.scope_id()\n",
    "logging.warning('project_id: {}'.format(project_id))\n",
    "\n",
    "connection_id = target_dir_path_object.resource_id()\n",
    "logging.warning('connection_id: {}'.format(connection_id))\n",
    "\n",
    "#bucket_name = target_dir_path_object.bucket_name()\n",
    "#logging.warning('bucket_name: {}'.format(bucket_name))\n",
    "\n",
    "dir_path = target_dir_path_object.file_path()\n",
    "logging.warning('dir_path: {}'.format(dir_path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e17f5f65-6fc4-453d-b65f-768dc72d0a81",
   "metadata": {},
   "outputs": [],
   "source": [
    "pipelines_client = WSPipelines.from_token(token, url=cpdaas_url)\n",
    "wml_credentials = pipelines_client.get_wml_credentials(target_dir_path_object.scope())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "32bf55bd-9da0-4ec2-934c-c38d73994d92",
   "metadata": {},
   "outputs": [],
   "source": [
    "wml_credentials['token'] = token\n",
    "wml_client = APIClient(wml_credentials)\n",
    "wml_client.set.default_project(project_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb8b6b57-5583-4906-ab2e-877ff6346e7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "wml_client.connections.get_details(connection_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8e06d6c-3926-4932-80eb-62aa52342945",
   "metadata": {},
   "outputs": [],
   "source": [
    "cos_props = wml_client.connections.get_details(connection_id)\n",
    "\n",
    "logging.warning('cos_props')\n",
    "logging.warning(cos_props)\n",
    "\n",
    "logging.warning(type(cos_props))\n",
    "bucket_name = cos_props['entity']['properties']['bucket']\n",
    "\n",
    "logging.warning('bucket_name: {}'.format(bucket_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42dd04db-ddb1-4e5e-927c-d00e4e8eeae8",
   "metadata": {},
   "outputs": [],
   "source": [
    "cos_props = cos_props['entity']['properties']\n",
    "url = cos_props['url'].split('//')[1]\n",
    "bucket = cos_props['bucket']\n",
    "logging.warning(cos_props)\n",
    "#cos_props = cos_props['credentials']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dcd91761-b5b4-4631-b3c8-5ab33358c8e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "import json\n",
    "url = 'dummy'\n",
    "bucket = 'dummy'\n",
    "\n",
    "cos_props = '''\n",
    "{\n",
    "  \"apikey\": \"sdsd\",\n",
    "  \"cos_hmac_keys\": {\n",
    "    \"access_key_id\": \"sdsdsdsdf3\",\n",
    "    \"secret_access_key\": \"sdsfsdf\"\n",
    "  },\n",
    "  \"endpoints\": \"https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints\",\n",
    "  \"iam_apikey_description\": \"Auto-generated for key crn:v1:bluemix:public:cloud-object-storage:global:a/9b13b857a32341b7167255de717172f5:24451914-8910-498e-8279-1851c5dd0328:resource-key:315158d1-a7a4-432f-9e11-c6e245e5011e\",\n",
    "  \"iam_apikey_name\": \"viewer-ed22e594-7a2f-4cda-95af-bc7ca806a7b6\",\n",
    "  \"iam_role_crn\": \"crn:v1:bluemix:public:iam::::serviceRole:Writer\",\n",
    "  \"iam_serviceid_crn\": \"crn:v1:bluemix:public:iam-identity::a/9b13b857a32341b7167255de717172f5::serviceid:ServiceId-9d606823-b14e-4633-bc49-8eba77ec3d7f\",\n",
    "  \"resource_instance_id\": \"crn:v1:bluemix:public:cloud-object-storage:global:a/9b13b857a32341b7167255de717172f5:24451914-8910-498e-8279-1851c5dd0328::\"\n",
    "}\n",
    "'''\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a1cfb31-384f-4dda-a6c5-91fb94fea326",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "import json\n",
    "url = 'dummy'\n",
    "bucket = 'dummy'\n",
    "cos_props = '''\n",
    "{\n",
    "    \"name\": \"torstensbucket\",\n",
    "    \"origin_country\": \"us\",\n",
    "    \"owner_id\": \"IBMid-120000ETQ5\",\n",
    "    \"properties\": {\n",
    "      \"api_key\": \"sdfsdf\",\n",
    "      \"auth_method\": \"instanceid_apikey\",\n",
    "      \"bucket\": \"torstensbucket\",\n",
    "      \"iam_url\": \"https://iam.test.cloud.ibm.com/identity/token\",\n",
    "      \"resource_instance_id\": \"crn:v1:staging:public:cloud-object-storage:global:a/bb287b72124612756cf16d897c8c18e5:b273ae5e-c945-4aec-a962-12a5e7d36493::\",\n",
    "      \"url\": \"https://s3.us-west.cloud-object-storage.test.appdomain.cloud\"\n",
    "    }\n",
    "}\n",
    "'''\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6093b45-ba24-4af0-8442-e8eaa2cc48b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# cos_props = json.loads(cos_props)\n",
    "if 'api_key' in  cos_props:\n",
    "    api_key = cos_props['api_key']\n",
    "elif 'properties' in  cos_props:\n",
    "    api_key = cos_props['properties']['api_key']\n",
    "else:\n",
    "    assert(False, \"Can't handle this\")\n",
    "target_location = \"cos://{}/{}/{}\".format(url,bucket, target_asset_name)\n",
    "logging.warning(target_location)\n",
    "logging.warning(api_key)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e33c170-e7a6-481e-babd-46b8b8c425ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "#api_key = 'jtbo_VatEv-Fl5GwPj678DId23iOPx4f6TFdNPxbbMrP'\n",
    "#sql_query_crn = 'crn:v1:bluemix:public:sql-query:us-south:a/9b13b857a32341b7167255de717172f5:cd3e149d-e183-4162-a392-50bab6c64f47::'\n",
    "logging.warning('instatiating sql query object with token {} and crn {}'.format(token,data_engine_crn))\n",
    "\n",
    "sqlClient = SQLQuery(api_key=None, token=token, instance_crn=data_engine_crn)\n",
    "\n",
    "logging.warning('done')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "welsh-grave",
   "metadata": {
    "papermill": {
     "duration": 4.178678,
     "end_time": "2021-03-22T20:29:30.176328",
     "exception": false,
     "start_time": "2021-03-22T20:29:25.997650",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "sql = sql + ' INTO {}'.format(target_location)\n",
    "\n",
    "\n",
    "\n",
    "if no_jobid_folder:\n",
    "    sql = sql + ' JOBPREFIX NONE'\n",
    "\n",
    "sql = sql + ' STORED AS {}'.format(format)\n",
    "\n",
    "\n",
    "if len(partition_columns) > 0 or number_of_objects > 0 or rows_per_object > 0:\n",
    "    sql = sql + ' PARTITIONED '\n",
    "\n",
    "if len(partition_columns) > 0:\n",
    "    sql = sql + ' BY ({})'.format(partition_columns)\n",
    "\n",
    "if number_of_objects > 0:\n",
    "    sql = sql + ' INTO {} OBJECTS'.format(number_of_objects)\n",
    "\n",
    "if rows_per_object > 0:\n",
    "    sql = sql + ' EVERY {} ROWS'.format(rows_per_object)\n",
    "\n",
    "    \n",
    "print(sql)\n",
    "\n",
    "job_id = sqlClient.submit_sql(sql)\n",
    "sqlClient.wait_for_job(job_id)\n",
    "job_details = sqlClient.get_job(job_id)\n",
    "job_status = job_details['status']\n",
    "print(\"Job \" + job_id + \" terminated with status: \" + job_status)\n",
    "if job_status == 'completed':\n",
    "    if exact_name:\n",
    "        sqlClient.rename_exact_result(job_id)\n",
    "        print(\"Job results written to single result object  \" + job_details['resultset_location'])\n",
    "    else:\n",
    "        print(\"Job results written to folder \" + job_details['resultset_location'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7081e1f-3066-4a94-914c-cc59bfb4021b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4c5db2c-d39b-4de3-a7da-3d09bf9239e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata = {\n",
    "  wml_client.data_assets.ConfigurationMetaNames.NAME: target_asset_name,\n",
    "  wml_client.data_assets.ConfigurationMetaNames.DESCRIPTION: 'Data asset created for SQL Query result',\n",
    "  wml_client.data_assets.ConfigurationMetaNames.CONNECTION_ID: connection_id,\n",
    "  wml_client.data_assets.ConfigurationMetaNames.DATA_CONTENT_NAME: dir_path\n",
    "}\n",
    "asset_details = wml_client.data_assets.store(meta_props=metadata)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2658bebc-c136-43c7-8e8d-bfd4b71afdad",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_cpd_path = target_dir_path.split('connections')[0]+'assets/'+asset_details['metadata']['asset_id']\n",
    "with open(data_asset,'w') as fout:\n",
    "    fout.write(new_cpd_path)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 470.538548,
   "end_time": "2021-03-22T20:37:13.369954",
   "environment_variables": {},
   "exception": null,
   "input_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb",
   "output_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb",
   "parameters": {},
   "start_time": "2021-03-22T20:29:22.831406",
   "version": "2.3.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
